* clean additional data to use in analysis
**** OUTLINE ****
* prelims
* inflation (FRED)	
* aggregate UI and UA series	
* working age population
* subsidy geo incidence																   
* closing
********************************************************************************
* prelims																	   
********************************************************************************
{ 
clear all
cap log close
set more off
}
********************************************************************************
* inflation (FRED)																	   
********************************************************************************
{
* load data
import delimited "$orig/additional data/inflation/ESPCPHPLA01IXOBM.csv", encoding(ISO-8859-1) clear

* generate year
g year = substr(date,1,4)
destring year, force replace
drop date

* rename
rename espcphpla01ixobm_nbd20120101 infbase2012
drop if infbase2012==.

* label
la var year "year"
la var infbase2012 "Inflation Deflator (Base 2012)"

* store
order year
sort year
compress
save "$data/int/inflation.dta", replace

}
********************************************************************************
* aggregate UI and UA series
********************************************************************************
{
** ui and ua series
forval year = 2003(1)2017 {
* 2003-2009
if `year' <= 2009 {
	import excel "$orig/additional data/ui and ua annual series/prd02a_`year'.xls", sheet(" ") cellrange(A10:AB24) clear
}
* post-2009
else if `year' >=2010 & `year'<=2013 {
	import excel "$orig/additional data/ui and ua annual series/prd02_`year'.xls", sheet(" ") cellrange(A9:U22) clear
}
else if `year'==2014 {
	import excel "$orig/additional data/ui and ua annual series/PRD_`year'.xls", sheet("PRD-02") cellrange(A6:T20) clear
}
else if `year'>=2015 & `year'<2017 {
	import excel "$orig/additional data/ui and ua annual series/PRD_`year'.xls", sheet("PRD-03") cellrange(A6:K20) clear
}
else if `year'>=2017 & `year'<2019 {
	import excel "$orig/additional data/ui and ua annual series/PRD_`year'.xls", sheet("PRD-3") cellrange(A6:T20) clear
}
* keep and rename
rename A age
if `year' >= 2010 {
	rename B total
}
if (`year' < 2015 | `year'>2016) & `year'>=2010 {
	rename D ui
}

if `year' <=2005 {
	rename D total
	rename H ui
	rename L ua
	rename X rai
}
else if `year' > 2005 & `year'<2010 {
	rename D total
	rename H ui
	rename L ua
	rename AB rai
}
if `year'==2010 {
	rename K ua
	rename U rai
}
else if `year'>2010 & `year'<=2013 {
	rename J ua
	rename T rai
}
else if `year'==2014 {
	rename J ua
	rename R rai
}
else if `year'>=2015 & `year'<2017{
	rename C ui
	rename F ua
	rename J rai
	cap rename K pae
}

else if `year'>=2017 {
	rename J ua
	rename R rai
	cap rename T pae
}

if `year'<2015 {
	keep age* total* ui* ua* rai*
}
else if `year'>= 2015 {
	keep age* total* ui* ua* rai* pae*
}
if `year' < 2010 {
	drop if total == "" | total=="`year'"
}
else if `year' >= 2010 {
	drop if age == ""
}

replace age = "Total" if age=="AMBOS SEXOS"
replace age = "16-19" if regexm(age,"16")
replace age = "20-24" if regexm(age,"20")
replace age = "25-29" if regexm(age,"25")
replace age = "30-34" if regexm(age,"30")
replace age = "35-39" if regexm(age,"35")
replace age = "40-44" if regexm(age,"40")
replace age = "45-49" if regexm(age,"45")
replace age = "50-54" if regexm(age,"50")
replace age = "55-59" if regexm(age,"55")
replace age = "60+" if regexm(age,"60")

foreach var in total ui ua rai {
	destring `var', force replace
	replace `var' = round(`var')
}

cap destring pae, force replace
cap replace pae = round(pae)
g year = `year'
save "$data/temp/ui_ua_`year'.dta", replace
}

* put together
clear
forval year=2003(1)2017{
	append using "$data/temp/ui_ua_`year'.dta"
	erase "$data/temp/ui_ua_`year'.dta"
}
replace pae=0 if pae==.

* store
order year age
compress
save "$data/temp/ui_ua_series.dta", replace

** active population
import excel "$orig/additional data/pobactiva_0219.xlsx", sheet("Sheet1") allstring clear

g year = substr(A,1,4)
g quarter = substr(A,6,1)

drop A
rename B age_1000
rename C age_15
rename D age_20
rename E age_25
rename F age_30
rename G age_35
rename H age_40
rename I age_45
rename J age_50
rename K age_55
rename L age_60
rename M age_65
rename N age_70

destring year quarter age*, force replace
drop if year==.

* age aggs
g age_3549 = age_35 + age_40 + age_45
g age_5064 = age_50 + age_55 + age_60
g age_6000 = age_60 + age_65 + age_70

reshape long age_, i(year quarter) j(a)

g x=a+4
tostring a x, force replace

g age = a + "-" + x

replace age = "35-49" if a == "3549"
replace age = "50-64" if a == "5064"
replace age = "60+" if a == "6000"
replace age = "Total" if a == "1000"

drop a x

rename age_ act

* agg at year level
bys year age: egen actpop = mean(act * 1000)
duplicates drop year age, force
drop quarter act

format actpop %11.0g

replace age = "16-19" if age=="15-19"

* store
compress
save "$data/temp/pactiv.dta", replace

** add vars
* data
use "$data/temp/ui_ua_series.dta", clear
merge 1:1 year age using "$data/temp/pactiv.dta", keepusing(actpop)
drop if _m==2
drop _m
rename actpop pop

* count assistance level
g UA = (ua + rai + pae)/1000
g UI = ui/1000
replace pop=pop/1000
la var UI "Contributive Level (UI)"
la var UA "Assistance Level (UA)"

* proportion
g UA_prop = (UA/pop)*100
g UI_prop = (UI/pop)*100
la var UI_prop "Pct Pop in UI"
la var UA_prop "Pct Pop in UA"

* aggregate age groups
g agroup = .
replace agroup = 0 if age=="Total"
replace agroup = 1 if age=="16-19"
replace agroup = 2 if age=="20-24"
replace agroup = 3 if age=="25-29"
replace agroup = 4 if age=="30-34"
replace agroup = 5 if age=="35-39"
replace agroup = 6 if age=="40-44"
replace agroup = 7 if age=="45-49"
replace agroup = 8 if age=="50-54"
replace agroup = 9 if age=="55-59"
replace agroup = 10 if age=="60+"
la def agroup 1 "16-19" 2 "20-24" 3 "25-29" 4 "30-34" 5 "35-39" 6 "40-44" /*
	*/ 7 "45-49" 8 "50-54" 9 "55-59" 10 "60+" 0 "Total"
la val agroup agroup
order year agroup
g agroup2 = .
replace agroup2 = 0 if agroup>=5 & agroup<=7
replace agroup2 = 1 if agroup>=8 & agroup<=10
la def agroup2 0 "35-49" 1 "50+"
la val agroup2 agroup2

* aggregate magnitudes
	* level
bys year agroup2: egen UI_agroup2 = sum(UI)
bys year agroup2: egen UA_agroup2 = sum(UA)
	* share
bys year agroup2: egen UI_prop_agroup2 = sum(UI_prop)
bys year agroup2: egen UA_prop_agroup2 = sum(UA_prop)

* store
compress
save "$data/int/ui_ua_series.dta", replace

* erase temp files
cap erase "$data/temp/pactiv.dta"
cap erase "$data/temp/ui_ua_series.dta"

}
********************************************************************************
* working age population
******************************************************************************** 
{
** activity rates
* load raw INE data
import delimited "$orig/additional data/activityrates_0218.csv", delimiter(";") clear
drop if _n <= 5 

* province
gen province  = substr(v1,1,2)
replace province = "00" if province == "Na"
drop if province == "No" | province == "Lo" | province == "Fu" | province == "La"
destring province, replace

* quarter variables
forval year = 2002(1)2018 {
forval quarter = 1(1)4 {
gen t_y`year'q`quarter' = ""
gen y`year'q`quarter' = ""
}
}

forval year = 2002(1)2018 {
forval quarter = 1(1)4 {
forval var = 2(1)67 {
replace t_y`year'q`quarter' = "v`var'" if v`var'[1]== "`year'T`quarter'"
replace y`year'q`quarter' = v`var' if t_y`year'q`quarter' =="v`var'"
}
drop t_y`year'q`quarter'
}
}
drop v*
drop if _n == 1

forval year = 2002(1)2018 {
forval quarter = 1(1)4 {
destring y`year'q`quarter', replace 
}
}

drop y2018q3 y2018q4
reshape long y , i(prov) j(_quarter, str)
rename y activepoprate_prov 
gen str4 yoq = substr(_q, 1, 4)
gen str2 qoy = substr(_q, 6, 1)
destring yoq, replace
destring qoy, replace
g yearq = yq(yoq, qoy)
format yearq %tq

* store data
save "$data/temp/activityrates_0218.dta", replace
	
** active population
* load raw INE data
import delimited "$orig/additional data/ui and ua annual series/activos0218.csv", delimiter(";") clear
drop if _n <= 5 

gen province  = substr(v1,1,2)
replace province = "00" if province == "Na"
drop if province == "No" | province == "Lo" | province == "Fu" | province == "La"
destring province, replace

forval year = 2002(1)2018 {
forval quarter = 1(1)4 {
gen t_y`year'q`quarter' = ""
gen y`year'q`quarter' = ""
}
}

forval year = 2002(1)2018 {
forval quarter = 1(1)4 {
forval var = 2(1)67 {
replace t_y`year'q`quarter' = "v`var'" if v`var'[1]== "`year'T`quarter'"
replace y`year'q`quarter' = v`var' if t_y`year'q`quarter' =="v`var'"
}
drop t_y`year'q`quarter'
}
}

drop v*
drop if _n == 1

forval year = 2002(1)2018 {
forval quarter = 1(1)4 {
destring y`year'q`quarter', replace 
}
}

drop y2018q3 y2018q4
reshape long y , i(province) j(_quarter, str)
rename y activepop_prov 
gen str4 yoq = substr(_q, 1, 4)
gen str2 qoy = substr(_q, 6, 1)
destring yoq, replace
destring qoy, replace
g yearq = yq(yoq, qoy)
format yearq %tq

* add activity rates
merge 1:1 province _quarter using "$data/temp/activityrates_0218.dta"
drop _merge

* generate working age population
g wapop = activepop_prov/activepoprate_prov

* store
compress
save "$data/temp/wapop.dta", replace

}
********************************************************************************
* subsidy geo incidence																   
********************************************************************************
{
	
* data
use "$data/mcvl2004-17_02affi.dta", clear
duplicates drop id affstart affend, force
g x = (affstart <= 20120601 & affend >= 20120601)
keep if x==1
drop x
egen tag = tag(id)
keep if tag==1
drop tag

* individuals in subsidy
g subsidy = (reltype == 753 | reltype == 754)

* add residence information
merge 1:1 id using "$data/mcvl2004-17_01pers.dta"
keep if _merge==3
drop _merge
g residence = residence2012
replace residence = residence2013 if residence==.
replace residence = residence2011 if residence==.
replace residence = residence2014 if residence==.
g proresidence = round(residence/1000)

* relevant variables
keep id affstart affend subsidy proresidence
drop affstart affend
rename proresidence province

* keep people in the subsidy
keep if subsidy==1
drop subsidy
g count=1

* collapse at province level
collapse (sum) count, by(prov)
drop if prov==0|prov>52

* get proportion
egen x = sum(count)
g ShrInProv = count/x
drop x

* add working age population in each province in 2012
g yoq=2012
g qoy=1
merge m:1 province yoq qoy using "$data/temp/wapop.dta", keepusing(wapop)
keep if _merge==3
drop _merge

* fig in 1000s
replace wapop = wapop * 100000

* according to official data, there were 367339 people in the subsidy in 2012
g PplSUB = 367339
* apply same distribution
g PplSUBProv = round(ShrInProv * PplSUB)

* share of wapop of subsidy in province
g ShrPplSUBProv = round((PplSUBProv / wapop) * 100,.01)
la var ShrPplSUBProv "Share in Province"

* prov to string
rename province prov
tostring prov, g(province)
replace province = "0" + province if length(province)==1

* store
compress
save "$data/int/GeoDistSUB.dta", replace

* erase temp files
cap erase "$data/temp/wapop.dta"
cap erase "$data/temp/activityrates_0218.dta"

}
********************************************************************************
* closing																	   
********************************************************************************
{
cap log close
clear
}
